In [ ]:
# Warning: you don't always want to do this, it will clutter your namespace
# We do it today for simplicity of presentation
%pylab notebook
Attribution: this notebook is based on slides created by Roland Memisevic. Some of the material related to classes was taken from http://www.stavros.io/tutorials/python/.
“Python is a general-purpose, high-level programming language whose design philosophy emphasizes code readability. Python claims to combine remarkable power with very clear syntax”, and its standard library is large and comprehensive. Its use of indentation for block delimiters is unique among popular programming languages.” -- Wikipedia
Some features of Python:
There are various implementations of Python available, and several (incompatible) versions. The most common Python 2.x (currently 2.7) is suitable for most of our data processing needs.
The "IPython" interactive shell is highly recommended for any kind of interactive work! Note that here, I'm using the IPython notebook.
In [ ]:
print "hello world"
In [ ]:
print "hello world", 1, 2, 1 + 2
In [ ]:
a = 1
b = 1
print "hello world", a + b
In [ ]:
a = 1
b = "hello"
print "hello world", a + b
In [ ]:
b = "world"
print "hello %s" % b
In [ ]:
T = (1, 2, 3, "hello")
print T[0]
Note that in Python, indexing starts at 0!
In [ ]:
L = [1, 2, 3, "hello"]
L[0] = "Lists are mutable"
print L[0]
In [ ]:
L.append("goodbye")
print L[-1]
In [ ]:
D = {"a": 1, "b": 2}
print D["a"]
In [ ]:
def timesfour(x):
return 4 * x
print timesfour(2)
In [ ]:
s = "z"
if s == "y":
print "y"
elif s == "z":
print "z"
else:
print "b"
In [ ]:
a = 1.0
s = "hello"
while a != 10.0 and s == "hello":
a = a + 1.0
print a
In [ ]:
for i in range(3):
print i
In [ ]:
for i in [1, 2, 'x', 3, 4, 'h', 5]:
print i
In [ ]:
type(timesfour)
In [ ]:
# At this point try tab completion on timesfour.
timesfour
In [ ]:
# here "object" specifies the superclass
class MyClass(object):
common = 10
# this is a constructor
def __init__(self):
self.myvariable = 3
def myfunction(self, arg1, arg2):
return self.myvariable
classinstance = MyClass()
# note that arguments passed to myfunction are ignored
print 'classinstance.myfunction(1, 2): %s' % classinstance.myfunction(1, 2)
# This variable is shared by all classes.
classinstance2 = MyClass()
print 'classinstance.common: %s' % classinstance.common
print 'classinstance2.common: %s' % classinstance2.common
In [ ]:
# Note how we use the class name instead of the instance.
MyClass.common = 30
print 'classinstance.common: %s' % classinstance.common
print 'classinstance2.common: %s' % classinstance2.common
In [ ]:
# This will not update the variable on the class,
# instead it will bind a new object to the old
# variable name.
classinstance.common = 10
print 'classinstance.common: %s' % classinstance.common
print 'classinstance2.common: %s' % classinstance2.common
In [ ]:
MyClass.common = 50
# This has not changed, because classinstance.common is
# now an instance variable.
print 'classinstance.common: %s' % classinstance.common
# but this has changed
print 'classinstance2.common: %s' % classinstance2.common
In [ ]:
# This class inherits from MyClass. The example
# class above inherits from "object", which makes
# it what's called a "new-style class".
# You can read more about these here: http://stackoverflow.com/a/54873
# Multiple inheritance is declared as:
# class OtherClass(MyClass1, MyClass2, MyClassN)
class OtherClass(MyClass):
# The "self" argument is passed automatically
# and refers to the class instance, so you can set
# instance variables as above, but from inside the class.
def __init__(self, arg1):
self.myvariable = 3
print "arg1: %s" % arg1
In [ ]:
classinstance = OtherClass("hello")
print 'classinstance.myfunction(1, 2): %s' % classinstance.myfunction(1, 2)
In [ ]:
# This class doesn't have a .test member, but
# we can add one to the instance anyway. Note
# that this will only be a member of classinstance.
classinstance.test = 10
print 'classinstance.test: %s' % classinstance.test
However, note that we can get pretty far without needing classes!
In [ ]:
a = [1,2,3]
help(a)
help expects the object you need help about. Just instantiate one, if you do not have it!
The ? operator is also useful (only works in IPython):
In [ ]:
list?
IPython also has a number of so-called "magic" functions: e.g. %autoreload, %paste, %debug, %hist, %timeit. Type %magic at a prompt to learn more.
In [ ]:
%lsmagic
In [ ]:
%%writefile mytest.py
for i in xrange(3):
print "hello %d" % i
In [ ]:
%run mytest.py
In [ ]:
import datetime
# imports a single object
from datetime import date
# imports everything (careful, pollutes namespace)
from datetime import *
Remember, everything is an object. Access the contents of modules accordingly.
In [ ]:
from datetime import datetime
datetime.now()
myobject = datetime.month
print myobject
"pylab" =
Two ways to use:
import pylab from a shell or "from pylab import *" to get direct access to the most important functions and objects--pylab to give you an environment that contains all the necessary goodiesipy to ipython --pylab in my shell, e.g. alias ipy="ipython --pylab"numpy.arrayAfter doing ipython --pylab or from pylab import *; from numpy import * from within Python, we have access to things like:
In [ ]:
array([1,2,3])
In [ ]:
ones((2,3))
In [ ]:
zeros((3,2))
In [ ]:
eye(4)
In [ ]:
# draw from normal distribution with mean 0, std 1
randn(2,2)
There are many other useful commands built in, e.g. load(), save(), loadtxt(), etc.
Numpy arrays have many useful member components:
In [ ]:
a = np.array([[1,2,3],[4,5,6]])
print a
print a.T
In [ ]:
a.mean(0)
In [ ]:
a.mean(1)
In [ ]:
a.mean()
In [ ]:
a.std(0)
In [ ]:
a.max()
One of the most useful built-in commands is ``shape'':
In [ ]:
print randn(3,3).shape
print randn(2,5,7,3).shape
print array([1,2,3,4]).shape
In [ ]:
a = array([1,2,3])
print a[0]
print a[1]
In [ ]:
a = array([[1,2,3],[4,5,6]])
print a[0,0]
print a[1,2]
In [ ]:
a = randn(2,3,4)
print a
print a[0,0,0]
print a[0,1,3]
In [ ]:
a = array([[1,2,3,4],[5,6,7,8],[9,10,11,12]])
print a
print a[0, :] # a "slice"
print a[:, 0] # another "slice"
print a[1:3, 0] # another "slice"
print a[1:3, :] # another "slice" (this is a 2-d block)
In [ ]:
x = arange(0,pi,pi/180)
plot(x, cos(x))
In [ ]:
x1 = 1 + 0.1*randn(20)
y1 = 1 + 0.1*randn(20)
x2 = -1 + 0.5*randn(20)
y2 = -1 + 0.5*randn(20)
scatter(x1,y1)
scatter(x2,y2,c='r',marker='x')
xlim(-2,2)
ylim(-2,2)
legend(['class 1', 'class2'], loc='lower right')
In [ ]:
subplot(1,2,1)
hist(randn(200),bins=10)
ylabel('counts')
xlabel('bins')
subplot(1,2,2)
boxplot(randn(200))
title('box and whisker plot')
In a box and whisker plot, the box extends from the lower to upper quartile values of the data, with a line at the median. The whiskers extend from the box to show the range of the data. Outlier points are those past the end of the whiskers.
How do you add a $2 \times 5$ matrix and a $1 \times 5$ vector?
repmat (or for the pros, bsxfun). You can use Python's repmat equivalent, called tile.
In [ ]:
print (randn(2,5) + randn(1,5)).shape
How about a $2 \times 5$ matrix plus a $1 \times 5 \times 3$ tensor?
In [ ]:
print randn(2,5).shape
print randn(2,5) + randn(1,5,3)
We need to make the number of dimensions match. Solution: Numpy's newaxis
In [ ]:
print randn(2,5)[:,:,newaxis].shape
print (randn(2,5)[:,:,newaxis] + randn(1,5,3)).shape
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
Then I use np to access Numpy methods and plt to access plotting methods.
Python has its own style convention called "PEP8". This is a great resource to check out if you are unsure about how to format your code. There are also command-line tools (e.g. pep8) which will verify your code against the standard. Most serious open-source projects will expect you to adhere to the PEP8 standard before contributing code.
In [ ]:
from IPython.display import YouTubeVideo
# a short video about using NumPy arrays, from Enthought
YouTubeVideo('vWkb7VahaXQ')